clear all
set more off


***********************************************************
//// 06-PROCESS_VEHICLE: CONSTRUCT VEHICLE INFORMATION ////
***********************************************************



*******************************************************
/// PART 5A: STORE VEHICLE INFO FROM CITATIONS DATA ///
*******************************************************

use "${temp}/process-citation", clear
keep CitationNumber CountyNum OffenseDate Vehicle*
duplicates drop CitationNumber CountyNum, force 

foreach x of varlist _all {
local RENAME = lower("`x'")
rename `x' `RENAME'
}

* HAVE TO RECAST SO CAN USE SIEVE OPERATION *
recast str50 vehiclemake, force
recast str50 vehiclestyle, force

/// CLEAN UP VEHICLE STRINGS ///
foreach x of varlist vehiclemake-vehiclestate {
egen TEMP_`x' = sieve(`x'), omit(0123456789)
replace `x' = TEMP_`x'
drop TEMP_`x'
replace `x' = trim(upper(`x'))
}

/// KEEP ONLY IF HAVE VEHICLE INFORMATION ///
gen ANY = !mi(vehicleyear)|!mi(vehiclemake)|!mi(vehiclestyle)
keep if ANY==1
drop ANY

/// STORE ///
qui compress
saveold "${temp}/stops-vehicle", replace



***********************************************************
/// PART 5B: HARMONIZE VEHICLE MAKES IN CITATIONS DATA /// 
**********************************************************

//// HOW THIS WORKS ////
//// MAKE-LIST.XLS HAS FIVE COLUMNS WITH CODES FOR THE SAME MAKE ///
//// GOING TO GO THROUGH COLUMNS AND MATCH ON EACH ONE 
//// STORE PREFERRED CODE WHICH IS THE FCC ODE ///
//// E.G. IF THE CITE DATA HAS VEHICLE MAKE AS TOYOTA, WE ARE GOING TO 
//// GET A MATCH AND THEN RECODE TOYOTA to TOYT WHICH IS THE FCC CODE 
//// FINAL STEP IS A FUZZY MATCH ///


/// STEP I IS EXACT MATCH ON FHP CODES ///
import excel using "${raw}/vehicle/make-list.xls", clear first
gen make_MATCH = make_fhp
keep make_MATCH make_fhp
drop if mi(make_MATCH)
tempfile LIST
save `LIST'

/// CITATIONS DATA ///
use "${temp}/stops-vehicle", clear
gen make_MATCH = vehiclemake
keep citationnumber countynum make_MATCH

/// MERGE ///
merge m:1 make_MATCH using `LIST', keep(1 3) 

/// MATCHES ////
gen make_1 = make_fhp if _m==3
gen match_1 = (_m==3)
drop _m

/// STORE ///
keep citationnumber countynum make_1 match_1
qui compress
saveold "${temp}/stops-vehicle-make-1", replace



/// STEP II IS EXACT MATCH ON ALT FHP CODES ///
import excel using "${raw}/vehicle/make-list.xls", clear first
gen make_MATCH = make_fhp_alt
keep make_MATCH make_fhp
drop if mi(make_MATCH)
tempfile LIST
save `LIST'

/// CITATIONS DATA ///
use "${temp}/stops-vehicle", clear
gen make_MATCH = vehiclemake
keep citationnumber countynum make_MATCH

/// MERGE ///
merge m:1 make_MATCH using `LIST', keep(1 3) 

/// MATCHES ////
gen make_2 = make_fhp if _m==3
gen match_2 = (_m==3)
drop _m

/// STORE ///
keep citationnumber countynum make_2 match_2
qui compress
saveold "${temp}/stops-vehicle-make-2", replace



/// STEP III IS EXACT MATCH ON FULL VEHICLE NAMES ///
import excel using "${raw}/vehicle/make-list.xls", clear first
gen make_MATCH = make_price
keep make_MATCH make_fhp
drop if mi(make_MATCH)
tempfile LIST
save `LIST'

/// CITATIONS DATA ///
use "${temp}/stops-vehicle", clear
gen make_MATCH = vehiclemake
keep citationnumber countynum make_MATCH

/// MERGE ///
merge m:1 make_MATCH using `LIST', keep(1 3) 

/// MATCHES ////
gen make_3 = make_fhp if _m==3
gen match_3 = (_m==3)
drop _m

/// STORE ///
keep citationnumber countynum make_3 match_3
qui compress
saveold "${temp}/stops-vehicle-make-3", replace



/// STEP IV IS EXACT MATCH ON FULL NAME ////
import excel using "${raw}/vehicle/make-list.xls", clear first
gen make_MATCH = make_fhp_full
keep make_MATCH make_fhp
drop if mi(make_MATCH)
tempfile LIST
save `LIST'

/// CITATIONS DATA ///
use "${temp}/stops-vehicle", clear
gen make_MATCH = vehiclemake
keep citationnumber countynum make_MATCH

/// MERGE ///
merge m:1 make_MATCH using `LIST', keep(1 3) 

/// MATCHES ////
gen make_4 = make_fhp if _m==3
gen match_4 = (_m==3)
drop _m

/// STORE ///
keep citationnumber countynum make_4 match_4
qui compress
saveold "${temp}/stops-vehicle-make-4", replace



/// STEP V IS EXACT MATCH ON TYPO-CORRECTED ABBREVIATIONS ///
import excel using "${raw}/vehicle/make-list.xls", clear first
gen make_MATCH = make_other
keep make_MATCH make_fhp
drop if mi(make_MATCH)
tempfile LIST
save `LIST'

/// CITATIONS DATA ///
use "${temp}/stops-vehicle", clear
gen make_MATCH = vehiclemake
keep citationnumber countynum make_MATCH

/// MERGE ///
merge m:1 make_MATCH using `LIST', keep(1 3) 

/// MATCHES ////
gen make_5 = make_fhp if _m==3
gen match_5 = (_m==3)
drop _m

/// STORE ///
keep citationnumber countynum make_5 match_5
qui compress
saveold "${temp}/stops-vehicle-make-5", replace



/// STEP VI IS FUZZY MATCH ///
import excel using "${raw}/vehicle/make-list.xls", clear first
gen make_MATCH = make_fhp_full
keep make_MATCH make_fhp
drop if mi(make_MATCH)
* ID FOR RECLINK *
sort make_MATCH
gen idu = _n
* STORE *
tempfile LIST
save `LIST'


//// UNIQUE VALUES FROM BIG DATASET ////
use "${temp}/stops-vehicle", clear
gen make_MATCH = vehiclemake
keep vehiclemake make_MATCH
* CUT DUPLICATES *
drop if mi(make_MATCH)
duplicates drop
* ID FOR RECLINK *
sort make_MATCH
gen idm = _n


//// RECLINK ////
reclink2 make_MATCH using `LIST', gen(score) idm(idm) idu(idu) manytoone

/// NO CHECKING ////
gen make_6 = make_fhp if (_m==3)
gen match_6 = (_m==3)

/// KEEPS ///
keep idm vehiclemake make_6 match_6
keep if match_6 == 1
duplicates tag idm, gen(tag)
drop if tag>0
drop idm tag

// STORE ///
tempfile MATCH
save `MATCH'

// REMERGE ///
use "${temp}/stops-vehicle", clear
keep citationnumber countynum vehiclemake
merge m:1 vehiclemake using `MATCH', nogen
replace match_6 = 0 if mi(match_6)

// STORE //
qui compress
saveold "${temp}/stops-vehicle-make-6", replace



/// COMPILE STEPS INTO CROSSWALK FILE ///
/// PRIORITY BY STEP ABOVE ///

use "${temp}/stops-vehicle", clear
forval i=1/6 {
merge 1:1 citationnumber countynum using "${temp}/stops-vehicle-make-`i'", nogen
}
gen make_code = make_1
forval i=2/6 {
replace make_code = make_`i' if mi(make_code)&!mi(make_`i')
}

//// SAVE CROSSWALK AND DELETE STEPS ///
keep citationnumber countynum make_code
qui compress
saveold "${temp}/stops-vehicle-make-match", replace

forval i=1/6 {
rm "${temp}/stops-vehicle-make-`i'.dta"
}


***********************************************************
/// PART 5C: CONSTRUCT PRICE ESTIMATES FROM TRUECAR /// 
**********************************************************

// CROSSWALK FROM NAMES IN TRUECAR TO NAMES IN FCC //
import excel using "${raw}/vehicle/make-list.xls", clear first
keep make_price make_fhp
tempfile XW
save `XW'

/// ATTACH XW TO PRICE DATA ///
import delim using "${raw}/vehicle/true_car_listings.csv", clear
gen make_price = trim(upper(make))
merge m:1 make_price using `XW', keep(3) nogen
// VAR CLEAN UP //
gen veh_make = make_fhp
gen veh_age = 2017 - year

/// AVERAGE PRICE BY MAKE x AGE CELL ////
collapse (mean) price, by(veh_make veh_age)
ren price veh_price

/// STORE TEMP FILE ///
tempfile PRICE
save `PRICE'



***********************************************************************
/// PART 5D: MATCH FIXED NAMES TO RAW VEHICLE INFO + MATCH PRICES //// 
***********************************************************************

use "${temp}/stops-vehicle", clear

//// CLEAN VEHICLE YEARS ///
//// GENERATE VEHICLE AGE BY SUBTRACTING VEHICLE YEAR FROM YEAR OF STOP
gen veh_year = vehicleyear
replace veh_year = veh_year + 2000 if !mi(vehicleyear) & (vehicleyear <= 18)
replace veh_year = veh_year + 1900 if !mi(vehicleyear) & (vehicleyear >= 70) & (vehicleyear <= 99)
replace veh_year = . if veh_year < 1970 | veh_year > 2018
gen veh_age = year(offensedate) - veh_year
replace veh_age = 0 if veh_age < 0

/// MERGE ON VEHICLE MAKE XW CREATED IN STEP B ///
merge 1:1 citationnumber countynum using "${temp}/stops-vehicle-make-match", nogen
ren make_code veh_make

/// CLEAN UP ///
keep citationnumber countynum veh_make veh_year veh_age
order citationnumber countynum veh_make veh_year veh_age

/// MERGE PRICE ///
merge m:1 veh_make veh_age using `PRICE', keep(1 3) nogen

/// CLEAN UP DATASET ///
label var veh_make "Vehicle Make (FHP Code)"
label var veh_year "Vehicle Year"
label var veh_age "Vehicle Age"
label var veh_price "Average Make x Age Price (TruCar)"

//// FIX LOWER CASE NAMES ///
ren citationnumber CitationNumber
ren countynum CountyNum

//// STORE ////
qui compress
saveold "${temp}/step-3-vehicle", replace




////// DELETE TEMPORARY FILES ///////
rm "${temp}/stops-vehicle.dta"
rm "${temp}/stops-vehicle-make-match.dta"







